Load data

source("mergeAllData.R", local = knitr::knit_global())
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
## here() starts at /Users/amelia/github/MHWsim-expt
## Rows: 1425 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): genet
## dbl  (7): tank, avg_size, sd_size, avg_biomass_g, sd_biomass_g, n_polyps_sam...
## date (1): date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3000 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): Date, Genet, Comments
## dbl  (7): Tank, Alive, Dying/Dead, Removed, Floating, N, Splitting
## time (1): Time
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 2925 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): Date, Genet, Comments
## dbl  (5): Tank, Open, Partial open, Partial closed, Closed
## time (1): Time
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#source("readRPiData.R", local = knitr::knit_global()) # Load temperature data AFTER you have uncommented and run this line
weekly_temp <- read_csv(here("experiment", "data", "weekly_temperature.csv"))
## Rows: 300 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): treatment
## dbl  (4): tank, avg_temp, min_temp, max_temp
## date (1): friday
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Make matrices

Make A matrix - genet ID (Five genets: A, B, C, D, E)

A_matrix <- matrix(rep(c("A", "B", "C", "D", "E"), times = 15), nrow = 75, ncol = 1)

Make C, c matrices - temperature covariate (15 tanks, 5 genets in each tank)

temp_covariate <- weekly_temp %>%
  filter(friday < ymd("2024-02-16")) %>%
  arrange(friday, tank) %>%
  slice(rep(1:n(), each = 5)) %>%
  mutate(tank_rep = paste0(tank, letters[1:5][rep(1:5, times = n()/5)])) %>%
  select(tank_rep, friday, avg_temp) %>%
  pivot_wider(names_from = tank_rep, values_from = avg_temp) %>%
  mutate(week = row_number()) %>%
  select(-friday) %>%
  column_to_rownames(var = "week") %>%
  as.matrix() %>%
  t()

C_matrix <- matrix(factor(rep(1:15, each = 5)), ncol = 1)

Make Z matrix list. Might need to incorporate that # cols = # treatments, because then I need to change C matrix dimensions and resolve underconstrained issues with Z, B matrices

Z_models <- list(
  H1 = matrix(factor(rep(1, 75))),
  H2 = matrix(factor(c(rep("amb", 25), rep("sev", 25), rep("ext", 25)))),
  H3 = matrix(factor(rep(c("A", "B", "C", "D", "E"), 15))),
  H4 = matrix(factor(c(rep("amb", 25), rep("mhw", 50)))),
  H5 = matrix(factor(c(rep("amb", 25), rep("sev", 25), rep("amb", 25)))), # post-hoc hypothesis
  H6 = matrix(factor(rep(c("gen1", "gen2", "gen2", "gen2", "gen1"), 15))), # post-hoc hypothesis
  H7 = matrix(factor(rep(c("gen1", "gen2", "gen3", "gen4", "gen1"), 15))), # post-hoc hypothesis
  H8 = matrix(factor(c(rep(c("amb_gen1", "amb_gen2", "amb_gen2", "amb_gen2", "amb_gen1"), 5),
                     rep(c("sev_gen1", "sev_gen2", "sev_gen2", "sev_gen2", "sev_gen1"),5),
                     rep(c("amb_gen1", "amb_gen2", "amb_gen2", "amb_gen2", "amb_gen1"), 5))))) # post-hoc hypothesis
  #H9 = matrix(factor(1:75)) #Do not use! Overfits the data and has by far the worst AIC 

names(Z_models) <- c("all_same", "diff_treat", "diff_genet", "mhw_same", "amb_ext_same", "AE_BCD", "AE_B_C_D", "ambExt_AE_BCD") #, "all_diff")

Hypotheses

  • H1: All genets and all treatments are the same
  • H2: All genets are the same but each treatment is different
  • H3: All treatments are the same but each genet is different
  • H4: All genets are the same and both MHW treatments are the same, but different from control treatment
  • H5: All genets are the same and the extreme MHW is the same as the control treatment (post-hoc hypothesis, - based off figures)
  • H6: All treatments are the same, and genets A/E are the same and genets B/C/D are the same (post-hoc - hypothesis, based off figures)
  • H7: All treatments are the same, and genets A/E are the same (post-hoc hypothesis, alternative to H6)
  • H8: The exterme MHW is the same as the control treatment, and genets A/E are the same and genets B/C/D are the same (post-hoc hypothesis, alternative to H5:H8)
  • H9: (Do not use!) All genets and all treatments are different

————– Notes from WSN 2024:

  • C matrix - all numeric matrix, 1 column -> This doesn’t work though, and needs to be same dimensions as c matrix
  • c matrix = temp covariate
  • B = Identity, random walk
  • Z = CUSTOM matrix, the hypotheses
  • A = A_matrix?
  • R = diagonal and equal #identity?
  • U = unequal?
  • Q = diagonal and equal?

Set up model list

mod_list <- list(
  B = "identity", #pretty sure this is right, random walk diagonal and unequal has worse AIC/BIC
  U = "zero", #unequal has worse AIC/BIC
  Q = "diagonal and equal", #All tanks have same mixing; random walk diagonal and unequal has worse AIC/BIC
  Z = "placeholder",
  A = A_matrix,
  R = "diagonal and equal", #errors are independent and equally distributed
  C = C_matrix,
  c = temp_covariate)

Body size

Prep data

all_size <- all %>%
  filter(!is.na(avg_size)) %>%
  mutate(week = as.numeric(difftime(date, min(weekly_temp$friday), units = "weeks")) + 1,
         avg_size_log = log(avg_size),
         sd_size_log = log(sd_size),
         tank = as.numeric(tank)) %>%
  select(date, week, tank, mhw, genet, avg_size_log) %>%
  left_join(weekly_temp, by = c("date" = "friday", "tank"))

all_size_marss <- all_size %>%
  mutate(genet_tank = paste(genet, tank, sep = "_")) %>%
  select(-date, -tank, -genet, -mhw, -treatment, -avg_temp, -min_temp, -max_temp) %>%
  pivot_wider(names_from = genet_tank, values_from = avg_size_log) %>%
  column_to_rownames(var = "week") %>%
  as.matrix() %>%
  t()

marss_data <- all_size_marss

Loop through Z matrix models

out.tab <- NULL
fits <- list()

for (i in 1:length(Z_models)) {
    mod_list$Z <- Z_models[[i]]
    fit <- MARSS(y=marss_data, model = mod_list, silent = TRUE, 
        control = list(maxit = 5000))
    out <- data.frame(H = names(Z_models)[i], logLik = fit$logLik, 
        AICc = fit$AICc, num.param = fit$num.params, m = length(unique(Z_models[[i]])), 
        num.iter = fit$numIter, converged = !fit$convergence)
    out.tab <- rbind(out.tab, out)
    fits <- c(fits, list(fit))
}

print(out.tab) #looking for: higher log likelihood, lower AIC
##               H   logLik      AICc num.param m num.iter converged
## 1      all_same 1588.865 -3128.874        24 1       61      TRUE
## 2    diff_treat 1635.665 -3218.325        26 3       61      TRUE
## 3    diff_genet 1616.143 -3175.123        28 5      303      TRUE
## 4      mhw_same 1623.802 -3196.676        25 2       61      TRUE
## 5  amb_ext_same 1623.543 -3196.156        25 2       61      TRUE
## 6        AE_BCD 1602.623 -3154.317        25 2      274      TRUE
## 7      AE_B_C_D 1615.734 -3176.385        27 4      303      TRUE
## 8 ambExt_AE_BCD 1642.047 -3229.011        27 4      271      TRUE
for (i in 1:length(fits)) {
  par(mfrow = c(2, 3))
  resids <- MARSSresiduals(fits[[i]], type = "tt1")
  for (j in 1:15) {
    plot(resids$model.residuals[j, ], ylab = "model residuals", 
        xlab = "")
    abline(h = 0)
    title(paste(names(Z_models[i]), ":", rownames(marss_data)[j]))
    }
}

# Plots 1 (xtT) & 2 (fitted.ytT): Do fitted values seem reasonable?
# Plot 3 (model.resids.ytt1): Do resids have temporal patterns? Do 95% of resids fall withing the CIs?
# Plot 4 (std.model.resids.ytT): Do resids have temporal patterns? Do 95% of resids fall withing the CIs?
# Plot 5 (std.state.resids.xtT): Any outliers?
# Plots 6 & 7 (qqplot.std.model.resids.ytt1: Are resids normal (straight line)?
# Plot 8 (acf.std.model.resids.ytt1): Do resids have temporal autocorrelation?

The residual plots are not looking great… and they’re essentially the same across all models(??)

Generate the z-scored data

the.mean <- apply(marss_data, 1, mean)
the.sigma <- sqrt(apply(marss_data, 1, var))
marss_data_z <- (marss_data - the.mean) * (1/the.sigma)

the.mean <- apply(temp_covariate, 1, mean)
the.sigma <- sqrt(apply(temp_covariate, 1, var))
temp_covariate_z <- (temp_covariate - the.mean) * (1/the.sigma)

Model the z-scored data

mod_list_z <- list(
  B = "diagonal and equal", #identity is another option, but models have lower logLik and higher AICc; random walk diagonal and unequal gives same model output
  U = "zero", #note: unequal doesn't converge even after 5k iterations
  Q = "diagonal and equal", #All tanks have same mixing (also, random walk diagonal and unequal is the same AICc and very close Q estimates)
  Z = "placeholder",
  A = "zero", #because data have been z-scored
  R = "diagonal and equal", #observation errors are independent and have equal variances, because there was only ONE sampler (me)
  C = C_matrix,
  c = temp_covariate_z)

out_tab_z <- NULL
fits_z <- list()
for (i in 1:length(Z_models)) {
    mod_list_z$Z <- Z_models[[i]]
    fit <- MARSS(y=marss_data_z, model = mod_list_z, silent = TRUE, 
        control = list(maxit = 5000))
    #print(fit)
    out <- data.frame(H = names(Z_models)[i], logLik = fit$logLik, 
        AICc = fit$AICc, num.param = fit$num.params, m = length(unique(Z_models[[i]])), 
        num.iter = fit$numIter, converged = !fit$convergence)
    out_tab_z <- rbind(out_tab_z, out)
    fits_z <- c(fits_z, list(fit))
}

print(out_tab_z) #looking for: higher log likelihood, lower AIC
##               H   logLik      AICc num.param m num.iter converged
## 1      all_same 150.9305 -261.2628        20 1      178      TRUE
## 2    diff_treat 151.1195 -257.5173        22 3      178      TRUE
## 3    diff_genet 151.0769 -253.2966        24 5      178      TRUE
## 4      mhw_same 150.9444 -259.2301        21 2      178      TRUE
## 5  amb_ext_same 151.1080 -259.5573        21 2      178      TRUE
## 6        AE_BCD 151.0703 -259.4820        21 2      178      TRUE
## 7      AE_B_C_D 151.0731 -255.3581        23 4      178      TRUE
## 8 ambExt_AE_BCD 151.2521 -255.7161        23 4      178      TRUE
for (i in 1:length(fits_z)) {
  par(mfrow = c(2, 3))
  resids_z <- MARSSresiduals(fits_z[[i]], type = "tt1")
  for (j in 1:15) {
    plot(resids_z$model.residuals[j, ], ylab = "model residuals", 
        xlab = "")
    abline(h = 0)
    title(paste(names(Z_models[i]), ":", rownames(marss_data_z)[j]))
    }
}

The residual plots are looking better… but they’re still essentially the same across all models(??)

According to AICc (and logLik), definitely don’t use “all_diff” and can probably also rule out “diff_genet”

Try z-scored data with different matrices?

mod_list_z <- list(
  B = "diagonal and unequal", #identity is another option, but models have lower logLik and higher AICc; random walk diagonal and unequal gives same model output
  U = "zero", #note: unequal doesn't converge even after 5k iterations
  Q = "diagonal and equal", #All tanks have same mixing (also, random walk diagonal and unequal is the same AICc and very close Q estimates)
  Z = "placeholder",
  A = "zero", #because data have been z-scored
  R = "diagonal and equal", #observation errors are independent and have equal variances, because there was only ONE sampler (me)
  C = C_matrix,
  c = temp_covariate_z)

out_tab_z <- NULL
fits_z <- list()
for (i in 1:length(Z_models)) {
    mod_list_z$Z <- Z_models[[i]]
    fit <- MARSS(y=marss_data_z, model = mod_list_z, silent = TRUE, 
        control = list(maxit = 5000))
    #print(fit)
    out <- data.frame(H = names(Z_models)[i], logLik = fit$logLik, 
        AICc = fit$AICc, num.param = fit$num.params, m = length(unique(Z_models[[i]])), 
        num.iter = fit$numIter, converged = !fit$convergence)
    out_tab_z <- rbind(out_tab_z, out)
    fits_z <- c(fits_z, list(fit))
}

print(out_tab_z) #looking for: higher log likelihood, lower AIC
##               H   logLik      AICc num.param m num.iter converged
## 1      all_same 150.9305 -261.2628        20 1      178      TRUE
## 2    diff_treat 151.1195 -257.5173        22 3      178      TRUE
## 3    diff_genet 151.0769 -253.2966        24 5      178      TRUE
## 4      mhw_same 150.9444 -259.2301        21 2      178      TRUE
## 5  amb_ext_same 151.1080 -259.5573        21 2      178      TRUE
## 6        AE_BCD 151.0703 -259.4820        21 2      178      TRUE
## 7      AE_B_C_D 151.0731 -255.3581        23 4      178      TRUE
## 8 ambExt_AE_BCD 151.2521 -255.7161        23 4      178      TRUE
for (i in 1:length(fits_z)) {
  par(mfrow = c(2, 3))
  resids_z <- MARSSresiduals(fits_z[[i]], type = "tt1")
  for (j in 1:15) {
    plot(resids_z$model.residuals[j, ], ylab = "model residuals", 
        xlab = "")
    abline(h = 0)
    title(paste(names(Z_models[i]), ":", rownames(marss_data_z)[j]))
    }
}

Visualize z-scored temperatures

temp_z <- weekly_temp %>%
  filter(friday < ymd("2024-02-16")) %>%
  arrange(friday, tank) %>%
  select(tank, friday, avg_temp) %>%
  group_by(tank) %>%
  mutate(avg_temp_z = (avg_temp - mean(avg_temp, na.rm = TRUE)) /sd(avg_temp, na.rm = TRUE),
         treatment = ifelse(tank < 6, "ambient",
                            ifelse(tank > 10, "extreme",
                                   "severe")),
         tank = as.factor(tank)) %>%
  ungroup()

ggplot(temp_z, aes(x = friday, y = avg_temp_z, color = tank)) +
  geom_line() +
  labs(title = "Z-scored average temperature by tank",
       x = "Date",
       y = "Z-scored average temperature") +
  theme_minimal() +
  facet_wrap(~treatment)

Do not run….

Experiment with non 75x1 Z matrix dimensions

#Each treatment different
Z_matrix2 <- matrix(factor(c(rep(c(1, 0, 0), 25),
                            rep(c(0, 1, 0), 25),
                            rep(c(0, 0, 1), 25))),
                   nrow = 75, byrow = TRUE)

# Combine the blocks into a single 75x3 matrix
matrix_data <- rbind(matrix(c(rep(1:5, each = 5), rep(0, 25), rep(0, 25)), nrow = 25, ncol = 3, byrow = FALSE),
                     matrix(c(rep(0, 25), rep(6:10, each = 5), rep(0, 25)), nrow = 25, ncol = 3, byrow = FALSE),
                     matrix(c(rep(0, 25), rep(0, 25), rep(10:14, each = 5)), nrow = 25, ncol = 3, byrow = FALSE))

C_matrix2 <- matrix(factor(rep(1:15, each = 5)), ncol = 3)

C_matrix2 <- matrix(0, nrow = 75, ncol = 3)
for (i in 1:3) {
  C_matrix2[((i-1)*5 + 1):(i*5), i] <- 1
}
C_matrix2 <- matrix(factor(C_matrix2), nrow = 75, ncol = 3)

model_list <- list(
  B = "identity",
  U = "zero",
  Q = "diagonal and equal",
  Z = Z_matrix2,
  A = A_matrix,
  R = "diagonal and equal",
  C = matrix_data,
  c = temp_covariate)

# non 75x1 Z matrix Does not run 
#fit1 <- MARSS(y=marss_data, model = model_list,
#              control = list(maxit = 1000))